# 爬取百度图片的街拍
import requests
from urllib.parse import urlencode
from urllib.request import urlretrieve
import os
import time

# 爬取图片信息
def getPage(pn):
    # 定义参数和地址
    params={
        "tn":"resultjson_com",
        "ipn": "rj",
        "ct": 201326592,
        "is":"",
        "fp": "result",
        "queryWord": "街拍",
        "cl": 2,
        "lm": -1,
        "ie": "utf - 8",
        "oe": "utf - 8",
        "adpicid":"",
        "st":-1,
        "z":"",
        "ic":0,
        "hd":"",
        "latest":"",
        "copyright":"",
        "word":"街拍",
        "s":"",
        "se":"",
        "tab":"",
        "width":"",
        "height":"",
        "face":0,
        "istype": 2,
        "qc":"",
        "nc":1,
        "fr":"",
        "expermode":"",
        "pn": pn,
        "rn": 30,
        "gsm": "1e0",
        1545566598127:""
    }
    url="https://image.baidu.com/search/acjson?"+urlencode(params)
    try:
        res=requests.get(url)
        if res.status_code==200:
            return res.json()
        else:
            return None
    except Exception as err:
        return None

# 存储图片
def saveImage(page,item):
    # 图片路径
    img_url = item.get("imgurl")
    if img_url:
        # 判断存储路径是否存在
        path="jiepai/"+str(page)
        if not os.path.exists(path):
            os.mkdir(path)
        # 图片命名
        img_save=path+"/"+img_url.split("/").pop()+".jpg"
        # 存储图片
        urlretrieve(img_url,img_save)

# 获取图片信息
def getImageData(res):
    data=res.get("data")
    if data:
        for item in data:
            imgurl=item.get("middleURL")
            yield {
                "imgurl":imgurl
            }

# 主函数
def main(pn):
    # 创建图片存储路径
    path = "jiepai"
    if not os.path.exists(path):
        os.mkdir(path)
    # 爬取图片信息
    data=getPage(pn)
    # 解析图片路径并保存
    for item in getImageData(data):
        print(item)
        page= int(pn/30)+1
        saveImage(page,item)

if __name__=="__main__":
    pn = 0
    for i in range(5):
        print("第",(i+1),"页爬取")
        pn = i * 30
        main(pn)
        time.sleep(1)
